Load data

redo_load(
  stores = here("data/stores.rds"),
  items  = here("data/items.rds"),
  prices = here("data/prices.rds")
)

Prices of item by store

set.seed(2542589)
ck_items = sample(items[["item_id"]], 3)
ck_stores = sample(stores[["store_id"]], 4)

prices[
  item_id %in% ck_items &
  store_id %in% ck_stores
] %>%
  as_tibble() %>%
  mutate(
    item_id = paste0("item_id=", item_id),
    store_id = paste0("store_id=", store_id),
  ) %>%
  plotf(price ~ d + store_id + item_id, geom=geom_line) %>%
  ggplotly()

Prices of item by stores in the same state

set.seed(2542589)
ck_items = sample(items[["item_id"]], 3)

prices[
  item_id %in% ck_items
] %>%
  merge(stores[,c("store_id", "state_id", "store_id_state")], by = c("store_id"), all.x = TRUE) %>%
  as_tibble() %>%
  mutate(
    item_id = paste0("item_id=", item_id),
    store_id = paste0("store_id=", store_id),
    state_id = paste0("state_id=", state_id),
    store_id_state = paste0("store_id_state=", store_id_state)
  ) %>%
  plotf(price ~ d + store_id_state + state_id + item_id, geom=geom_line) %>%
  ggplotly()

Prices by department

set.seed(2542589)

ck_items =
  items %>%
  lazy_dt() %>%
  count(dept_id, cat_id) %>%
  rename(Freq = n) %>%
  merge(items, all.y = TRUE) %>%
  sample_n(30, replace = TRUE, weight = Freq) %>%
  pull(item_id)

prices2 =
  merge(
    prices[ item_id %in% ck_items ],
    items[,c("item_id", "dept_id", "cat_id")],
    by = "item_id",
    all.x = TRUE
  )

plotf(prices2, price ~ dept_id, geom=geom_boxplot)
## Warning: Removed 109459 rows containing non-finite values (stat_boxplot).

plotf(prices2, price ~ cat_id, geom=geom_boxplot)
## Warning: Removed 109459 rows containing non-finite values (stat_boxplot).

plotf(prices2, price ~ dept_id + cat_id, geom=geom_boxplot)
## Warning: Removed 109459 rows containing non-finite values (stat_boxplot).